
****************************
*sort out relevant variables
****************************

version 15.1
cd "${mypath}\CHNS_project\01_data\02_posted\"

log using "${mypath}\CHNS_project\03_log_files\03_sorting_out_relevant_variables.log", replace

use analysis1989_2015.dta,clear

*generate parents' id
format IDind_f IDind_m %20.0f
sort newid

foreach num of numlist 1989 1991 1993 1997 2000 2004 2006 2009 2011 2015 {
  gen fa_`num' = IDind_f if wave == `num' & IDind_f != .
	replace fa_`num' = 0 if fa_`num' == .
	bys IDind:egen father_`num' = max(fa_`num')
}

foreach num of numlist 1989 1991 1993 1997 2000 2004 2006 2009 2011 2015 {
	gen mo_`num' = IDind_m if wave == `num' & IDind_m != .
	replace mo_`num' = 0 if mo_`num' == .
	bys IDind:egen mother_`num' = max(mo_`num')
}
bys IDind:egen min_wave = min(wave)
bys IDind:egen max_wave = max(wave)

foreach num of numlist 1989 1991 1993 1997 2000 2004 2006 2009 2011 2015 {
	replace father_`num' = . if father_`num' == 0
	replace mother_`num' = . if mother_`num' == 0
}

gen father_id = IDind_f
replace father_id = father_1989 if mi(father_id) & !mi(father_1989) & wave == 1991 &  ///
                                   father_1989 == father_1993 & min_wave < 1991 & max_wave > 1991
replace father_id = father_1991 if mi(father_id) & !mi(father_1991) & wave == 1993 & ///
                                   father_1991 == father_1997 & min_wave < 1993 & max_wave > 1993
replace father_id = father_1993 if mi(father_id) & !mi(father_1993) & wave == 1997 & ///
                                   father_1993 == father_2000 & min_wave < 1997 & max_wave > 1997
replace father_id = father_1997 if mi(father_id) & !mi(father_1997) & wave == 2000 & ///
                                   father_1997 == father_2004 & min_wave < 2000 & max_wave > 2000
replace father_id = father_2000 if mi(father_id) & !mi(father_2000) & wave == 2004 & ///
                                   father_2000 == father_2006 & min_wave < 2004 & max_wave > 2004
replace father_id = father_2004 if mi(father_id) & !mi(father_2004) & wave == 2006 & ///
                                   father_2004 == father_2009 & min_wave < 2006 & max_wave > 2006
replace father_id = father_2006 if mi(father_id) & !mi(father_2006) & wave == 2009 & ///
                                   father_2006 == father_2011 & min_wave < 2009 & max_wave > 2009
replace father_id = father_2009 if mi(father_id) & !mi(father_2009) & wave == 2011 & ///
                                   father_2009 == father_2015 & min_wave < 2011 & max_wave > 2011

gen mother_id = IDind_m
replace mother_id = mother_1989 if mi(mother_id) & !mi(mother_1989) & wave == 1991 & ///
                                   mother_1989 == mother_1993 & min_wave < 1991 & max_wave > 1991
replace mother_id = mother_1991 if mi(mother_id) & !mi(mother_1991) & wave == 1993 & ///
                                   mother_1991 == mother_1997 & min_wave < 1993 & max_wave > 1993
replace mother_id = mother_1993 if mi(mother_id) & !mi(mother_1993) & wave == 1997 & ///
                                   mother_1993 == mother_2000 & min_wave < 1997 & max_wave > 1997
replace mother_id = mother_1997 if mi(mother_id) & !mi(mother_1997) & wave == 2000 & ///
                                   mother_1997 == mother_2004 & min_wave < 2000 & max_wave > 2000
replace mother_id = mother_2000 if mi(mother_id) & !mi(mother_2000) & wave == 2004 & ///
                                   mother_2000 == mother_2006 & min_wave < 2004 & max_wave > 2004
replace mother_id = mother_2004 if mi(mother_id) & !mi(mother_2004) & wave == 2006 & ///
                                   mother_2004 == mother_2009 & min_wave < 2006 & max_wave > 2006
replace mother_id = mother_2006 if mi(mother_id) & !mi(mother_2006) & wave == 2009 & ///
                                   mother_2006 == mother_2011 & min_wave < 2009 & max_wave > 2009
replace mother_id = mother_2009 if mi(mother_id) & !mi(mother_2009) & wave == 2011 & ///
                                   mother_2009 == mother_2015 & min_wave < 2011 & max_wave > 2011

gen fa_id = string(father_id,"%20.0f") + string(wave) if !mi(father_id)
gen mo_id = string(mother_id,"%20.0f") + string(wave) if !mi(mother_id)
destring fa_id mo_id,replace
format fa_id mo_id %20.0f
drop min_wave

*generate spouse's id 
gen sp_id = string(IDind_s,"%20.0f") + string(wave) if !mi(IDind_s)
destring sp_id,replace
format sp_id %20.0f

*generate ethnicity
recode NATIONALITY (2/20 = 1 "minority")(1 = 0 "Han")(-9 = .),gen(ethnicity)
la var ethnicity "ethnicity"
la val ethnicity ethnicity

*generate age
replace age = wave - WEST_DOB_Y if mi(age)
replace age = . if age == -3

*generate household asset index
merge m:1 homeid using HAI.dta
drop if _merge == 2
drop _merge

*generate replationship
rename A5 relation

*generate "living at home" (wasn't in the 1993 survey)
recode A5E (0 9=.),gen(living)
recode living (1 = 1 "living at home")(2/6 = 0 "not living at home"),gen(be_at_home)

*generate household size(hhsize = number of members surveyed + not surveyed)
sort homeid IDind
gen count = 1
bys homeid:egen hhsize_addition = total(count)
gen household_size = hhsize
replace household_size = hhsize_addition if mi(household_size)

*generate household size dummy
recode household_size (1/2 = 1 "two or less")(3 = 2 "three")(4 = 3 "four")(5 = 4 "five")(6/16 = 5 "six or more"),gen(household_group)
tab household_group,gen(h_group) 

*generate number of children under 16
gen be_child = age < 16
bys homeid:egen num_child = total(be_child)

*generate number of elderly over 65
gen be_elderly = age > 65 if !mi(age)
replace be_elderly = 0 if mi(be_elderly)
bys homeid:egen num_elderly = total(be_elderly)

*generate agegroup
recode age (min/17.9 = 1 "18岁以下")(18/25.9 = 2 "18-25")(26/30.9 = 3 "26-30") ///
           (31/35.9 = 4 "31-35")(36/40.9 = 5 "36-40")(41/45.9 = 6 "41-45") ///
		       (46/50.9 = 7 "46-50")(51/55.9 = 8 "51-55")(56/60.9 = 9 "56-60") ///
		       (61/65.9 = 10 "61-65")(66/max = 11 "66岁以上"),gen(agegroup)
tab agegroup,gen(ag)

*generate gender
gen gender = GENDER == 1 if !mi(GENDER)
la var gender "gender"
la def gender 1 "male" 0 "female"
la val gender gender

*be student
recode A13 (9 = .),gen(be_student)
la var be_student "still at school"
la def be_student 1 "yes" 0 "no"
la val be_student be_student

*generate hukou
gen hukou = A8B1 == 1 if !mi(A8B1)
la var hukou "hukou"
la def hukou 1 "urban" 0 "rural"
la val hukou hukou

*interpolate missing value of hukou by personal experience
*1.interpolate missing value of hukou if there is no change between two adjacent years.
*2.hukou data has been collected since 1993.
preserve 
keep IDind wave hukou
reshape wide hukou,i(IDind) j(wave)

*interpolate missing vale of wave 1997
replace hukou1997 = hukou2000 if hukou1993 == hukou2000 & ///
                                 mi(hukou1997) & !mi(hukou2000)
replace hukou1997 = hukou2004 if hukou1993 == hukou2004 & ///
                                 mi(hukou1997) & mi(hukou2000) & ///
								 !mi(hukou2004)
replace hukou1997 = hukou2006 if hukou1993 == hukou2006 & ///
                                 mi(hukou1997) & mi(hukou2000) & ///
								 mi(hukou2004) & !mi(hukou2006)
replace hukou1997 = hukou2009 if hukou1993 == hukou2009 & ///
                                 mi(hukou1997) & mi(hukou2000) & ///
								 mi(hukou2004) & mi(hukou2006) & ///
								 !mi(hukou2009)
replace hukou1997 = hukou2011 if hukou1993 == hukou2011 & ///
                                 mi(hukou1997) & mi(hukou2000) & ///
								 mi(hukou2004) & mi(hukou2006) & ///
								 mi(hukou2009) & !mi(hukou2011)
replace hukou1997 = hukou2015 if hukou1993 == hukou2015 & ///
                                 mi(hukou1997) & mi(hukou2000) & ///
								 mi(hukou2004) & mi(hukou2006) & ///
								 mi(hukou2009) & mi(hukou2011) & ///
								 !mi(hukou2015)

*interpolate missing vale of wave 2000
replace hukou2000 = hukou2004 if hukou1997 == hukou2004 & ///
                                 mi(hukou2000) & !mi(hukou2004)
replace hukou2000 = hukou2006 if hukou1997 == hukou2006 & ///
                                 mi(hukou2000) & mi(hukou2004)& ///
								 !mi(hukou2006)
replace hukou2000 = hukou2009 if hukou1997 == hukou2009 & ///
                                 mi(hukou2000) & mi(hukou2004)& ///
								 mi(hukou2006) & !mi(hukou2009)
replace hukou2000 = hukou2011 if hukou1997 == hukou2011 & ///
                                 mi(hukou2000) & mi(hukou2004)& ///
								 mi(hukou2006) & mi(hukou2009) & ///
								 !mi(hukou2011)
replace hukou2000 = hukou2015 if hukou1997 == hukou2015 & ///
                                 mi(hukou2000) & mi(hukou2004)& ///
								 mi(hukou2006) & mi(hukou2009) & ///
								 mi(hukou2011) & !mi(hukou2015)

*interpolate missing vale of wave 2004
replace hukou2004 = hukou2006 if hukou2000 == hukou2006 & ///
                                 mi(hukou2004) & !mi(hukou2006)
replace hukou2004 = hukou2009 if hukou2000 == hukou2009 & ///
                                 mi(hukou2004) & mi(hukou2006)& ///
								 !mi(hukou2009)
replace hukou2004 = hukou2011 if hukou2000 == hukou2011 & ///
                                 mi(hukou2004) & mi(hukou2006)& ///
								 mi(hukou2009) & !mi(hukou2011)
replace hukou2004 = hukou2015 if hukou2000 == hukou2015 & ///
                                 mi(hukou2004) & mi(hukou2006) & ///
								 mi(hukou2009) & mi(hukou2011) & ///
								 !mi(hukou2015)

*interpolate missing vale of wave 2006
replace hukou2006 = hukou2009 if hukou2004 == hukou2009 & ///
                                 mi(hukou2006) & !mi(hukou2009)
replace hukou2006 = hukou2011 if hukou2004 == hukou2011 & ///
                                 mi(hukou2006) & mi(hukou2009) & ///
								 !mi(hukou2011)
replace hukou2006 = hukou2015 if hukou2004 == hukou2015 & ///
                                 mi(hukou2006) & mi(hukou2009) & ///
								 mi(hukou2011) & !mi(hukou2015)

*interpolate missing vale of wave 2009
replace hukou2009 = hukou2011 if hukou2006 == hukou2011 & ///
                                 mi(hukou2009) & !mi(hukou2011)
replace hukou2009 = hukou2015 if hukou2006 == hukou2015 & ///
                                 mi(hukou2009)& mi(hukou2011) & ///
								 !mi(hukou2015)

*interpolate missing vale of wave 2011
replace hukou2011 = hukou2015 if hukou2009 == hukou2015 & ///
                                 mi(hukou2011)& !mi(hukou2015)

reshape long hukou,i(IDind) j(wave)
tostring IDind, format(%100.0g) replace
gen newid = IDind + string(wave)
destring newid,replace
format newid %20.0f
rename hukou hukou_interpo
drop IDind wave
sa hukou_interpo.dta,replace
restore

merge 1:1 newid using hukou_interpo.dta
drop if _merge == 2
drop _merge
la var hukou_interpo "hukou(supplemented)"
la def hukou_interpo 0 "rural" 1 "urban" 
la val hukou_interpo hukou_interpo

*condition for identification of migrants
*from areas of destination between years of 1993 and 2011:
*1 t2: be surveyed in urban;
*2 hukou: hukou is rural;
gen be_migrant = T2 == 1 & hukou_interpo == 0 if !mi(hukou_interpo)
la var be_migrant "be migrant(from the countryside)"
la def be_migrant 0 "No" 1 "Yes" 
la val be_migrant be_migrant

*generate migrant status
gen m_group = 1 if be_migrant == 1
replace m_group = 2 if T2 == 2 & be_migrant == 0
replace m_group = 3 if T2 == 1 & be_migrant == 0
la var m_group "migrant status"
la def m_group 1 "migrants" 2 "rural residents" 3 "urban residents"
la val m_group m_group

*generate dummy variables of migrant groups
tab m_group,gen(mg)
drop mg1

*exclude student, military and police
drop if be_student == 1 | inlist(B4,8,9)

*generate marital status
codebook A8,tab(999)
replace A8 = . if A8 < 1 | A8 > 5
replace A8 = 1 if A8 > 1 & A8 < 2
replace A8 = 4 if A8 > 4 & A8 < 5
gen marital = A8
la var marital "marital status"
la def marital 1 "never married" 2 "married" 3 "divorced" 4 "widowed" 5 "separated"
la val marital marital

*generate marital status(including "divorced,widowed,separated")
*All respondents who answered questions about their marital experiences were also identified as married.
recode marital (1 =0 "never married")(2/5 =1 "married"),gen(marital_status)
replace marital_status = 1 if !mi(IDind_s) & mi(marital_status)
replace marital_status = 1 if inrange(S1,2,4) & mi(marital_status)
replace marital_status = 1 if !inrange(S2,999904,999999) & !mi(S2) & mi(marital_status) 
replace marital_status = 1 if inrange(S3,0,1) & mi(marital_status)
replace marital_status = 1 if !inrange(S5,999911,999999) & !mi(S5) & mi(marital_status)
replace marital_status = 1 if (!mi(S34) | !mi(S35) | (S36 != 999999 & !mi(S36))| !mi(S37) | !mi(S39)) & mi(marital_status)
la var marital_status "marriage(including divorced,widowed,separated)"
la val marital_status marital_status

*interpolate missing value of marital status if someone has been married.
preserve 
keep IDind wave marital_status
reshape wide marital_status,i(IDind) j(wave)

replace marital_status1991 = marital_status1989 if marital_status1989 == 1 & ///
                                                   mi(marital_status1991)
replace marital_status1993 = marital_status1991 if marital_status1991 == 1 & ///
                                                   mi(marital_status1993) 
replace marital_status1997 = marital_status1993 if marital_status1993 == 1 & ///
                                                   mi(marital_status1997)
replace marital_status2000 = marital_status1997 if marital_status1997 == 1 & ///
                                                   mi(marital_status2000)
replace marital_status2004 = marital_status2000 if marital_status2000 == 1 & ///
                                                   mi(marital_status2004) 
replace marital_status2006 = marital_status2004 if marital_status2004 == 1 & ///
                                                   mi(marital_status2006)  
replace marital_status2009 = marital_status2006 if marital_status2006 == 1 & ///
                                                   mi(marital_status2009)    
replace marital_status2011 = marital_status2009 if marital_status2009 == 1 & ///
                                                   mi(marital_status2011)    
replace marital_status2015 = marital_status2011 if marital_status2011 == 1 & ///
                                                   mi(marital_status2015)   

reshape long marital_status,i(IDind) j(wave)
tostring IDind, format(%100.0g) replace
gen newid = IDind + string(wave)
destring newid,replace
format newid %20.0f
rename marital_status marital_status_interpo
drop IDind wave
sa marital_status_interpo.dta,replace
restore 

merge 1:1 newid using marital_status_interpo.dta
drop if _merge == 2
drop _merge
la var marital_status_interpo "marital status(supplemented)"
la def marital_status_interpo 0 "never married" 1 "married" 
la val marital_status_interpo marital_status_interpo

*Since 2004, marital status has only appeared on adult questionnaires
replace marital_status_interpo = 0 if mi(marital_status_interpo) & age < 18

*generate employment(including retired and rehired employees but not include persons under the age of 16)
gen employed = B2
replace employed = . if B2 < 0 | B2 > 1
replace employed = 1 if (employed == 0 | mi(employed)) & (B4 > 0 & !mi(B4)) & inrange(wave,1993,2004)
replace employed = 1 if (employed == 0 | mi(employed)) & (B5 > 0 & !mi(B5)) & inrange(wave,1993,2004)
replace employed = 1 if (employed == 0 | mi(employed)) & (B6 > 0 & !mi(B6)) & inrange(wave,1993,2004)
replace employed = . if age < 16
la var employed "current employment(including the farmers)"
la def employed 1 "be currently employed" 0 "be currently unemployed"
la val employed employed

*generate employment of first job last year(including retired and rehired employees but not include persons under the age of 16)
replace employed_last1 = 0 if mi(employed_last1) & employed == 0
replace employed_last1 = . if age < 16
la var employed_last1 "employment of first job last year(including the farmers)"
la def employed_last1 1 "be employed last year(first job)" 0 "be unemployed last year(first job)"
la val employed_last1 employed_last1

*generate employment of second job last year(including retired and rehired employees but not include persons under the age of 16)
replace employed_last2 = 1 if employed_last2 == 0
replace employed_last2 = 0 if mi(employed_last2) & employed == 0
replace employed_last2 = . if age < 16
la var employed_last2 "employment of second job last year(including the farmers)"
la def employed_last2 1 "be employed last year(second job)" 0 "be unemployed last year(second job)"
la val employed_last2 employed_last2

*generate working months/days/hours(last year)
recode C31 (-9 0 = .),gen(workmonths1)
recode C32 (-9 0 = .),gen(workmonths2)
replace workmonths1 = . if age < 16
replace workmonths2 = . if age < 16

la var workmonths1 "working months(first job, last year)"
la val workmonths1 workmonths1

la var workmonths2 "working months(second job, last yer)"
la val workmonths2 workmonths2

recode C51 (-9 0 = .)(9 = 7),gen(workdays1)
recode C52 (-9 0 = .)(9 = 7),gen(workdays2)
replace workdays1 = . if age < 16
replace workdays2 = . if age < 16

la var workdays1 "working days per week(first job, per week last year)"
la val workdays1 workdays1

la var workdays2 "working days per week(second job, per week last year)"
la val workdays2 workdays2

recode C61 (-9 0 = .),gen(workhours1)
recode C62 (-9 0 = .),gen(workhours2)
replace workhours1 = . if age < 16
replace workhours2 = . if age < 16

la var workhours1 "working hours per day(first job, per day last year)"
la val workhours1 workhours1

la var workhours2 "working hours per day(second job, per day last year)"
la val workhours2 workhours2

gen hours_lasty1 = workdays1 * workhours1 if !mi(workdays1) & !mi(workhours1) & age >= 16 & !mi(age)
gen hours_lasty2 = workdays2 * workhours2 if !mi(workdays2) & !mi(workhours2) & age >= 16 & !mi(age)
gen worktime1 = workmonths1 * (365/12/7) * workdays1 * workhours1 if !mi(workmonths1) & !mi(workdays1) & ///
                                                                     !mi(workhours1) & age >= 16 & !mi(age)
gen worktime2 = workmonths2 * (365/12/7) * workdays2 * workhours2 if !mi(workmonths2) & !mi(workdays2) & ///
                                                                     !mi(workhours2) & age >= 16 & !mi(age)

la var hours_lasty1 "working time per week last year(first job)"
la val hours_lasty1 hours_lasty1

la var hours_lasty2 "working time per week last year(second job)"
la val hours_lasty2 hours_lasty2

la var worktime1 "working time last year(first job)"
la val worktime1 worktime1

la var worktime2 "working time last year(second job)"
la val worktime2 worktime2
                        
*generate working hours(last week)
recode C71 (-99 -9 0 = .), gen(hours_lastw1)
replace hours_lastw1 = . if age < 16
recode C72 (-99 -9 0 = .), gen(hours_lastw2)
replace hours_lastw2 = . if age < 16

la var hours_lastw1 "working hours last week(first job)"
la val hours_lastw1 hours_lastw1

la var hours_lastw2 "working hours last week(second job)"
la val hours_lastw2 hours_lastw2

gen overexertion_lasty = hours_lasty1 > 44 if !mi(hours_lasty1)
gen overexertion_lastw = hours_lastw1 > 44 if !mi(hours_lastw1)

la var overexertion_lasty "over the legal weekly working hours last year"
la val overexertion_lasty overexertion_lasty

la var overexertion_lastw "over the legal weekly working hours last week"
la val overexertion_lastw overexertion_lastw

gen no_weekend = workdays1 == 7 if !mi(workdays1)

la var no_weekend "no weekends"
la val no_weekend no_weekend

*generate monthly wage of first job last year(inflated to 2015)
gen wage_1 = C81/i_CPI2015 if C81 > 0 & !mi(C81) & age >= 16 & !mi(age)
gen log_wage_1 = log(wage_1) 
replace log_wage_1 = 0 if C81 == 0
la var log_wage_1 "monthly wage of first job(log)"
la val log_wage_1 log_wage_1

*generate monthly wage of second job last year(inflated to 2015)
gen wage_2 = C82/i_CPI2015 if C82 > 0 & !mi(C82) & age >= 16 & !mi(age)
gen log_wage_2 = log(wage_2)
replace log_wage_2 = 0 if C82 == 0
la var log_wage_2 "monthly wage of second job(log)"
la val log_wage_2 log_wage_2

*gen annual bonuses of first job last year
gen bonuse_1 = I191/i_CPI2015 if I191 > 0 & !mi(I191) & age >= 16 & !mi(age)
gen log_bonuse_1 = log(bonuse_1)
replace log_bonuse_1 = 1 if log_bonuse_1 < 0
replace log_bonuse_1 = 0 if I191 == 0
la var log_bonuse_1 "annual bonuses of first job(log)"
la val log_bonuse_1 log_bonuse_1

*gen annual bonuses of second job last year
gen bonuse_2 = I192/i_CPI2015 if I192 > 0 & !mi(I192) & age >= 16 & !mi(age)
gen log_bonuse_2 = log(bonuse_2)
replace log_bonuse_2 = 0 if I192 == 0
la var log_bonuse_2 "annual bonuses of second job"
la val log_bonuse_2 log_bonuse_2

*generate annual individual income(get a logarithm)
replace indinc_cpi = 1 if indinc_cpi < 1 & !mi(indinc_cpi)
gen log_income_i = log(indinc_cpi) if !mi(indinc_cpi)
la var log_income_i "annual individual income(log)"
la val log_income_i log_income_i

*generate per capita household income(get a logarithm)
replace hhinc_cpi = 1 if hhinc_cpi < 1 & !mi(hhinc_cpi)
gen log_fam_income = log(hhinc_cpi) if !mi(hhinc_cpi)
la var log_fam_income "total household income(log)"
la val log_fam_income log_fam_income

replace hhincpc_cpi = 1 if hhincpc_cpi < 1 & !mi(hhincpc_cpi)
gen log_pc_income = log(hhincpc_cpi) if !mi(hhincpc_cpi)
la var log_pc_income "per capita household income(log)"
la val log_pc_income log_pc_income

*genetation occupation
recode B4 (-9 = .)(1/2 12 = 1 "professional or technical worker")(3/4=2 "officer") /// 
          (5=3 "farmer fisherman hunter")(6=4 "skilled worker")(7=5 "non-skilled worker") ///
		  (10/11=6 "service worker")(13/15=7 "other(including homemaker)")(16=.),gen(occupation)
replace occupation = . if age < 16
tab occupation, gen(occ)
la var occupation "primary occupation"
la val occupation occupation

gen non_skilled = occupation == 5 if !mi(occupation)
la var non_skilled "be non-skilled worker(occupation)"
la val non_skilled non_skilled

gen service_worker = occupation == 6 if !mi(occupation)
la var service_worker "be service-worker(occupation)"
la val service_worker service_worker

gen non_skilled_service = inlist(occupation,5,6) if !mi(occupation)
la var non_skilled_service "be non-skilled & service worker(occupation)"
la val non_skilled_service non_skilled_service

*generate position of first job
recode B5 (8 9 =.)(1 2 = 1 "self-employed")(3 = 2 "works for another person or enterprise")(4 = 3 "contractor")(5/7 = 4 "temporary worker"),gen(position)
replace position = . if age < 16
tab position, gen(posit)

gen stable_worker = position == 2 if !mi(position)
la var stable_worker "be stable worker(position)"
la val stable_worker stable_worker

gen temporary_worker = position == 4 if !mi(position)
la var temporary_worker "be temporary worker(position)"
la val temporary_worker temporary_worker

gen temporary_contractor = inlist(position,3,4) if !mi(position)
la var temporary_contractor "be temporary contractor(position)"
la val temporary_contractor temporary_contractor

*generate types of work unit
recode B6 (-9 9 = .)(1 = 1 "government")(2/3 = 2 "state-owned institute or state-owned enterprise") ///
          (4/5 = 3 "collective enterprise")(6 = 4 "Family contract farming") ///
		  (7/8 = 5 "Private, individual enterprise or Three-capital enterprise"),gen(work_unit)
replace work_unit = . if age < 16
tab work_unit, gen(unit)
la var work_unit "work unit"
la val work_unit work_unit

*generate migrant and urban farmer
gen be_farmer = m_group != 2 & occupation == 3 & work_unit == 4
replace be_farmer = . if mi(m_group) | mi(occupation) | mi(work_unit)
la var be_farmer "be migrant and urban farmer"
la val be_farmer be_farmer

*generate health insurance
*All respondents who choose any kind of medical insurance were also identified as insurance beneficiary.
recode M1 (1 = 1 "have health insurance") (0 = 0 "do not have health insurance")(9 = .),gen(health_insurance)
replace health_insurance = 1 if ((health_insurance == . | health_insurance == 9) & wave != 1989) & ///
                                (M3A_0 ==1 | M3A_1 == 1 | M3A_2 == 1 | M3A_3 == 1 | M3A_4 == 1 | M3A_5 == 1 | ///
                M3A_6 == 1 | M3A_7 == 1 | M3A_8 == 1 | M3A_9 == 1 | M3A_10 == 1 | M3A_11 == 1 | M3A_12 == 1)
replace health_insurance = . if wave == 1989

*generate education variable
*dealing with the missing values of formal education
gen formal_edu = A11 
replace formal_edu = . if formal_edu == -9
preserve 
keep IDind wave formal_edu
reshape wide formal_edu,i(IDind) j(wave)

*interpolate missing vale of wave 1991
replace formal_edu1991 = formal_edu1993 if formal_edu1989 == formal_edu1993 & ///
                                           mi(formal_edu1991) & !mi(formal_edu1993)
replace formal_edu1991 = formal_edu1997 if formal_edu1989 == formal_edu1997 & ///
                                           mi(formal_edu1991) & mi(formal_edu1993) & ///
										   !mi(formal_edu1997)
replace formal_edu1991 = formal_edu2000 if formal_edu1989 == formal_edu2000 & ///
                                           mi(formal_edu1991) & mi(formal_edu1993) & ///
										   mi(formal_edu1997) & !mi(formal_edu2000)
replace formal_edu1991 = formal_edu2004 if formal_edu1989 == formal_edu2004 & ///
                                           mi(formal_edu1991) & mi(formal_edu1993) & ///
										   mi(formal_edu1997) & mi(formal_edu2000) & ///
										   !mi(formal_edu2004)
replace formal_edu1991 = formal_edu2006 if formal_edu1989 == formal_edu2006 & ///
                                           mi(formal_edu1991) & mi(formal_edu1993) & ///
										   mi(formal_edu1997) & mi(formal_edu2000) & ///
										   mi(formal_edu2004) & !mi(formal_edu2006)
replace formal_edu1991 = formal_edu2009 if formal_edu1989 == formal_edu2009 & ///
                                           mi(formal_edu1991) & mi(formal_edu1993) & ///
										   mi(formal_edu1997) & mi(formal_edu2000) & ///
										   mi(formal_edu2004) & mi(formal_edu2006) & ///
										   !mi(formal_edu2009)
replace formal_edu1991 = formal_edu2011 if formal_edu1989 == formal_edu2011 & ///
                                           mi(formal_edu1991) & mi(formal_edu1993) & ///
										   mi(formal_edu1997) & mi(formal_edu2000) & ///
										   mi(formal_edu2004) & mi(formal_edu2006) & ///
										   mi(formal_edu2009) & !mi(formal_edu2011)
replace formal_edu1991 = formal_edu2015 if formal_edu1989 == formal_edu2015 & ///
                                           mi(formal_edu1991) & mi(formal_edu1993) & ///
										   mi(formal_edu1997) & mi(formal_edu2000) & ///
										   mi(formal_edu2004) & mi(formal_edu2006) & ///
										   mi(formal_edu2009) & mi(formal_edu2011) & ///
										   !mi(formal_edu2015)

*interpolate missing vale of wave 1993
replace formal_edu1993 = formal_edu1997 if formal_edu1991 == formal_edu1997 & ///
                                           mi(formal_edu1993) & !mi(formal_edu1997)
replace formal_edu1993 = formal_edu2000 if formal_edu1991 == formal_edu2000 & ///
                                           mi(formal_edu1993) & mi(formal_edu1997) & ///
										   !mi(formal_edu2000)
replace formal_edu1993 = formal_edu2004 if formal_edu1991 == formal_edu2004 & ///
                                           mi(formal_edu1993) & mi(formal_edu1997) & ///
										   mi(formal_edu2000) & !mi(formal_edu2004)
replace formal_edu1993 = formal_edu2006 if formal_edu1991 == formal_edu2006 & ///
                                           mi(formal_edu1993) & mi(formal_edu1997) & ///
										   mi(formal_edu2000) & mi(formal_edu2004) & ///
										   !mi(formal_edu2006)
replace formal_edu1993 = formal_edu2009 if formal_edu1991 == formal_edu2009 & ///
                                           mi(formal_edu1993) & mi(formal_edu1997) & ///
										   mi(formal_edu2000) & mi(formal_edu2004) & ///
										   mi(formal_edu2006) & !mi(formal_edu2009)
replace formal_edu1993 = formal_edu2011 if formal_edu1991 == formal_edu2011 & ///
                                           mi(formal_edu1993) & mi(formal_edu1997) & ///
										   mi(formal_edu2000) & mi(formal_edu2004) & ///
										   mi(formal_edu2006) & mi(formal_edu2009) & ///
										   !mi(formal_edu2011)
replace formal_edu1993 = formal_edu2015 if formal_edu1991 == formal_edu2015 & ///
                                           mi(formal_edu1993) & mi(formal_edu1997) & ///
										   mi(formal_edu2000) & mi(formal_edu2004) & ///
										   mi(formal_edu2006) & mi(formal_edu2009) & ///
										   mi(formal_edu2011) & !mi(formal_edu2015)

*interpolate missing vale of wave 1997
replace formal_edu1997 = formal_edu2000 if formal_edu1993 == formal_edu2000 & ///
                                           mi(formal_edu1997) & !mi(formal_edu2000)
replace formal_edu1997 = formal_edu2004 if formal_edu1993 == formal_edu2004 & ///
                                           mi(formal_edu1997) & mi(formal_edu2000) & ///
										   !mi(formal_edu2004)
replace formal_edu1997 = formal_edu2006 if formal_edu1993 == formal_edu2006 & ///
                                           mi(formal_edu1997) & mi(formal_edu2000) & ///
										   mi(formal_edu2004) & !mi(formal_edu2006)
replace formal_edu1997 = formal_edu2009 if formal_edu1993 == formal_edu2009 & ///
                                           mi(formal_edu1997) & mi(formal_edu2000) & ///
										   mi(formal_edu2004) & mi(formal_edu2006) & ///
										   !mi(formal_edu2009)
replace formal_edu1997 = formal_edu2011 if formal_edu1993 == formal_edu2011 & ///
                                           mi(formal_edu1997) & mi(formal_edu2000) & ///
										   mi(formal_edu2004) & mi(formal_edu2006) & ///
										   mi(formal_edu2009) & !mi(formal_edu2011)
replace formal_edu1997 = formal_edu2015 if formal_edu1993 == formal_edu2015 & ///
                                           mi(formal_edu1997) & mi(formal_edu2000) & ///
										   mi(formal_edu2004) & mi(formal_edu2006) & ///
										   mi(formal_edu2009) & mi(formal_edu2011) & ///
										   !mi(formal_edu2015)

*interpolate missing vale of wave 2000
replace formal_edu2000 = formal_edu2004 if formal_edu1997 == formal_edu2004 & ///
                                           mi(formal_edu2000) & !mi(formal_edu2004)
replace formal_edu2000 = formal_edu2006 if formal_edu1997 == formal_edu2006 & ///
                                           mi(formal_edu2000) & mi(formal_edu2004)& ///
										   !mi(formal_edu2006)
replace formal_edu2000 = formal_edu2009 if formal_edu1997 == formal_edu2009 & ///
                                           mi(formal_edu2000) & mi(formal_edu2004)& ///
										   mi(formal_edu2006) & !mi(formal_edu2009)
replace formal_edu2000 = formal_edu2011 if formal_edu1997 == formal_edu2011 & ///
                                           mi(formal_edu2000) & mi(formal_edu2004)& ///
										   mi(formal_edu2006) & mi(formal_edu2009) & ///
										   !mi(formal_edu2011)
replace formal_edu2000 = formal_edu2015 if formal_edu1997 == formal_edu2015 & ///
                                           mi(formal_edu2000) & mi(formal_edu2004)& ///
										   mi(formal_edu2006) & mi(formal_edu2009) & ///
										   mi(formal_edu2011) & !mi(formal_edu2015)

*interpolate missing vale of wave 2004
replace formal_edu2004 = formal_edu2006 if formal_edu2000 == formal_edu2006 & ///
                                           mi(formal_edu2004) & !mi(formal_edu2006)
replace formal_edu2004 = formal_edu2009 if formal_edu2000 == formal_edu2009 & ///
                                           mi(formal_edu2004) & mi(formal_edu2006)& ///
										   !mi(formal_edu2009)
replace formal_edu2004 = formal_edu2011 if formal_edu2000 == formal_edu2011 & ///
                                           mi(formal_edu2004) & mi(formal_edu2006)& ///
										   mi(formal_edu2009) & !mi(formal_edu2011)
replace formal_edu2004 = formal_edu2015 if formal_edu2000 == formal_edu2015 & ///
                                           mi(formal_edu2004) & mi(formal_edu2006)& ///
										   mi(formal_edu2009) & mi(formal_edu2011) & ///
										   !mi(formal_edu2015)

*interpolate missing vale of wave 2006
replace formal_edu2006 = formal_edu2009 if formal_edu2004 == formal_edu2009 & ///
                                           mi(formal_edu2006) & !mi(formal_edu2009)
replace formal_edu2006 = formal_edu2011 if formal_edu2004 == formal_edu2011 & ///
                                           mi(formal_edu2006) & mi(formal_edu2009)& ///
										   !mi(formal_edu2011)
replace formal_edu2006 = formal_edu2015 if formal_edu2004 == formal_edu2015 & ///
                                           mi(formal_edu2006) & mi(formal_edu2009)& ///
										   mi(formal_edu2011) & !mi(formal_edu2015)

*interpolate missing vale of wave 2009
replace formal_edu2009 = formal_edu2011 if formal_edu2006 == formal_edu2011 & ///
                                           mi(formal_edu2009) & !mi(formal_edu2011)
replace formal_edu2009 = formal_edu2015 if formal_edu2006 == formal_edu2015 & ///
                                           mi(formal_edu2009) & mi(formal_edu2011) & ///
										   !mi(formal_edu2015)

*interpolate missing vale of wave 2011
replace formal_edu2011 = formal_edu2015 if formal_edu2009 == formal_edu2015 & ///
                                           mi(formal_edu2011) & !mi(formal_edu2015)

reshape long  formal_edu,i(IDind) j(wave)
tostring IDind, format(%100.0g) replace
gen newid=IDind+string(wave)
destring newid,replace
format newid %20.0f
rename formal_edu formal_edu_interpo
drop IDind wave
sa formal_edu_interpo.dta,replace
restore

merge 1:1 newid using formal_edu_interpo.dta
drop if _merge==2
drop _merge

*dealing with the missing values of highest academic qualification
gen highest_quali = A12
replace highest_quali = . if highest_quali == 9
preserve 
keep IDind wave highest_quali
reshape wide highest_quali,i(IDind) j(wave)

*interpolate missing vale of wave 1991
replace highest_quali1991 = highest_quali1993 if highest_quali1989 == highest_quali1993 & ///
                                                 mi(highest_quali1991) & !mi(highest_quali1993)
replace highest_quali1991 = highest_quali1997 if highest_quali1989 == highest_quali1997 & ///
                                                 mi(highest_quali1991) & mi(highest_quali1993) & ///
												 !mi(highest_quali1997)
replace highest_quali1991 = highest_quali2000 if highest_quali1989 == highest_quali2000 & ///
                                                 mi(highest_quali1991) & mi(highest_quali1993) & ///
												 mi(highest_quali1997) & !mi(highest_quali2000)
replace highest_quali1991 = highest_quali2004 if highest_quali1989 == highest_quali2004 & ///
                                                 mi(highest_quali1991) & mi(highest_quali1993) & ///
												 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 !mi(highest_quali2004)
replace highest_quali1991 = highest_quali2006 if highest_quali1989 == highest_quali2006 & ///
                                                 mi(highest_quali1991) & mi(highest_quali1993) & ///
												 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 mi(highest_quali2004) & !mi(highest_quali2006)
replace highest_quali1991 = highest_quali2009 if highest_quali1989 == highest_quali2009 & ///
                                                 mi(highest_quali1991) & mi(highest_quali1993) & ///
												 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 mi(highest_quali2004) & mi(highest_quali2006) & ///
												 !mi(highest_quali2009)
replace highest_quali1991 = highest_quali2011 if highest_quali1989 == highest_quali2011 & ///
                                                 mi(highest_quali1991) & mi(highest_quali1993) & ///
												 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 mi(highest_quali2004) & mi(highest_quali2006) & ///
												 mi(highest_quali2009) & !mi(highest_quali2011)
replace highest_quali1991 = highest_quali2015 if highest_quali1989 == highest_quali2015 & ///
                                                 mi(highest_quali1991) & mi(highest_quali1993) & ///
												 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 mi(highest_quali2004) & mi(highest_quali2006) & ///
												 mi(highest_quali2009) & mi(highest_quali2011) & ///
												 !mi(highest_quali2015)

*interpolate missing vale of wave 1993
replace highest_quali1993 = highest_quali1997 if highest_quali1991 == highest_quali1997 & ///
                                                 mi(highest_quali1993) & !mi(highest_quali1997)
replace highest_quali1993 = highest_quali2000 if highest_quali1991 == highest_quali2000 & ///
                                                 mi(highest_quali1993) & mi(highest_quali1997) & ///
												 !mi(highest_quali2000)
replace highest_quali1993 = highest_quali2004 if highest_quali1991 == highest_quali2004 & ///
                                                 mi(highest_quali1993) & mi(highest_quali1997) & ///
												 mi(highest_quali2000) & !mi(highest_quali2004)
replace highest_quali1993 = highest_quali2006 if highest_quali1991 == highest_quali2006 & ///
                                                 mi(highest_quali1993) & mi(highest_quali1997) & ///
												 mi(highest_quali2000) & mi(highest_quali2004) & ///
												 !mi(highest_quali2006)
replace highest_quali1993 = highest_quali2009 if highest_quali1991 == highest_quali2009 & ///
                                                 mi(highest_quali1993) & mi(highest_quali1997) & ///
												 mi(highest_quali2000) & mi(highest_quali2004) & ///
												 mi(highest_quali2006) & !mi(highest_quali2009)
replace highest_quali1993 = highest_quali2011 if highest_quali1991 == highest_quali2011 & ///
                                                 mi(highest_quali1993) & mi(highest_quali1997) & ///
												 mi(highest_quali2000) & mi(highest_quali2004) & ///
												 mi(highest_quali2006) & mi(highest_quali2009) & ///
												 !mi(highest_quali2011)
replace highest_quali1993 = highest_quali2015 if highest_quali1991 == highest_quali2015 & ///
                                                 mi(highest_quali1993) & mi(highest_quali1997) & ///
												 mi(highest_quali2000) & mi(highest_quali2004) & ///
												 mi(highest_quali2006) & mi(highest_quali2009) & ///
												 mi(highest_quali2011) & !mi(highest_quali2015)

*interpolate missing vale of wave 1997
replace highest_quali1997 = highest_quali2000 if highest_quali1993 == highest_quali2000 & ///
                                                 mi(highest_quali1997) & !mi(highest_quali2000)
replace highest_quali1997 = highest_quali2004 if highest_quali1993 == highest_quali2004 & ///
                                                 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 !mi(highest_quali2004)
replace highest_quali1997 = highest_quali2006 if highest_quali1993 == highest_quali2006 & ///
                                                 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 mi(highest_quali2004) & !mi(highest_quali2006)
replace highest_quali1997 = highest_quali2009 if highest_quali1993 == highest_quali2009 & ///
                                                 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 mi(highest_quali2004) & mi(highest_quali2006) & ///
												 !mi(highest_quali2009)
replace highest_quali1997 = highest_quali2011 if highest_quali1993 == highest_quali2011 & ///
                                                 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 mi(highest_quali2004) & mi(highest_quali2006) & ///
												 mi(highest_quali2009) & !mi(highest_quali2011)
replace highest_quali1997 = highest_quali2015 if highest_quali1993 == highest_quali2015 & ///
                                                 mi(highest_quali1997) & mi(highest_quali2000) & ///
												 mi(highest_quali2004) & mi(highest_quali2006) & ///
												 mi(highest_quali2009) & mi(highest_quali2011) & ///
												 !mi(highest_quali2015)

*interpolate missing vale of wave 2000
replace highest_quali2000 = highest_quali2004 if highest_quali1997 == highest_quali2004 & ///
                                                 mi(highest_quali2000) & !mi(highest_quali2004)
replace highest_quali2000 = highest_quali2006 if highest_quali1997 == highest_quali2006 & ///
                                                 mi(highest_quali2000) & mi(highest_quali2004)& ///
												 !mi(highest_quali2006)
replace highest_quali2000 = highest_quali2009 if highest_quali1997 == highest_quali2009 & ///
                                                 mi(highest_quali2000) & mi(highest_quali2004) & ///
												 mi(highest_quali2006) & !mi(highest_quali2009)
replace highest_quali2000 = highest_quali2011 if highest_quali1997 == highest_quali2011 & ///
                                                 mi(highest_quali2000) & mi(highest_quali2004)& ///
												 mi(highest_quali2006) & mi(highest_quali2009) & ///
												 !mi(highest_quali2011)
replace highest_quali2000 = highest_quali2015 if highest_quali1997 == highest_quali2015 & ///
                                                 mi(highest_quali2000) & mi(highest_quali2004) & ///
												 mi(highest_quali2006) & mi(highest_quali2009) & ///
												 mi(highest_quali2011) & !mi(highest_quali2015)

*interpolate missing vale of wave 2004
replace highest_quali2004 = highest_quali2006 if highest_quali2000 == highest_quali2006 & ///
                                                 mi(highest_quali2004) & !mi(highest_quali2006)
replace highest_quali2004 = highest_quali2009 if highest_quali2000 == highest_quali2009 & ///
                                                 mi(highest_quali2004) & mi(highest_quali2006)& ///
												 !mi(highest_quali2009)
replace highest_quali2004 = highest_quali2011 if highest_quali2000 == highest_quali2011 & ///
                                                 mi(highest_quali2004) & mi(highest_quali2006) & ///
												 mi(highest_quali2009) & !mi(highest_quali2011)
replace highest_quali2004 = highest_quali2015 if highest_quali2000 == highest_quali2015 & ///
                                                 mi(highest_quali2004) & mi(highest_quali2006) & ///
												 mi(highest_quali2009) & mi(highest_quali2011) & ///
												 !mi(highest_quali2015)

*interpolate missing vale of wave 2006
replace highest_quali2006 = highest_quali2009 if highest_quali2004 == highest_quali2009 & ///
                                                 mi(highest_quali2006) & !mi(highest_quali2009)
replace highest_quali2006 = highest_quali2011 if highest_quali2004 == highest_quali2011 & ///
                                                 mi(highest_quali2006) & mi(highest_quali2009)& ///
												 !mi(highest_quali2011)
replace highest_quali2006 = highest_quali2015 if highest_quali2004 == highest_quali2015 & ///
                                                 mi(highest_quali2006) & mi(highest_quali2009)& ///
												 mi(highest_quali2011) & !mi(highest_quali2015)

*interpolate missing vale of wave 2009
replace highest_quali2009 = highest_quali2011 if highest_quali2006 == highest_quali2011 & ///
                                                 mi(highest_quali2009) & !mi(highest_quali2011)
replace highest_quali2009 = highest_quali2015 if highest_quali2006 == highest_quali2015 & ///
                                                 mi(highest_quali2009) & mi(highest_quali2011) & ///
												 !mi(highest_quali2015)

*interpolate missing vale of wave 2011
replace highest_quali2011 = highest_quali2015 if highest_quali2009 == highest_quali2015 & ///
                                                 mi(highest_quali2011) & !mi(highest_quali2015)

reshape long highest_quali,i(IDind) j(wave)
tostring IDind, format(%100.0g) replace
gen newid=IDind+string(wave)
destring newid,replace
format newid %20.0f
rename highest_quali highest_quali_interpo
drop IDind wave
sa highest_quali_interpo.dta,replace
restore

merge 1:1 newid using highest_quali_interpo.dta
drop if _merge==2
drop _merge

*generate degree of education
recode formal_edu_interpo (0=0 "no formal education")(11=1)(12=2)(13=3)(14=4)(15=5)(16=6)(21=7) ///
                          (22=8)(23=9)(24=10)(25=11)(26=12)(27=10)(28=11)(29=12)(31=13)(32=14) ///
						  (33=15)(34=16)(35=17)(36=18),gen(year_education)
replace year_education = 0 if year_education == . & highest_quali_interpo == 0
replace year_education = 6 if year_education == . & highest_quali_interpo == 1
replace year_education = 9 if year_education == . & highest_quali_interpo == 2
replace year_education = 12 if year_education == . & highest_quali_interpo == 3
replace year_education = 12 if year_education == . & highest_quali_interpo == 4
replace year_education = 16 if year_education == . & highest_quali_interpo == 5
replace year_education = 18 if year_education == . & highest_quali_interpo == 6

recode formal_edu_interpo (0 = 1 "no formal education")(11/16 = 2 "primary school")(21/23 = 3 "lower middle school") ///
                          (24/26 = 4 "upper middle school")(27/29 = 5 "secondary professional technical schools") ///
				       	 (31/36 = 6 "undergraduate or higher"),gen(degree_education)
replace degree_education = 1 if degree_education == . & highest_quali_interpo == 0
replace degree_education = 2 if degree_education == . & highest_quali_interpo == 1
replace degree_education = 3 if degree_education == . & highest_quali_interpo == 2
replace degree_education = 4 if degree_education == . & highest_quali_interpo == 3
replace degree_education = 5 if degree_education == . & highest_quali_interpo == 4
replace degree_education = 6 if degree_education == . & (highest_quali_interpo == 5 | highest_quali_interpo == 6)

*generate year dummy
drop if wave <= 1991
tab wave,gen(year)

*generate province dummy
tab T1,gen(prov)

*drop respondents lacking physical characteristics(39 observation)
drop if age == .

gen pro_id=string(T1)+string(wave)
destring pro_id,replace
format pro_id %20.0f

merge m:1 pro_id using "${mypath}\CHNS_project\01_data\01_CHNS\province.dta",keep(match) nogen

keep newid homeid IDind hhid fa_id mo_id sp_id wave year1-year8 T1 prov1-prov12 T2 relation A5B A5D A8B ethnicity ///
     gender age agegroup ag1-ag11 hukou_interpo be_migrant m_group mg2 mg3 marital_status_interpo ///
	 employed employed_last1 employed_last2 workmonths1 workmonths2 workdays1 workdays2 workhours1 workhours2 ///
	 hours_lasty1 hours_lasty2 worktime1 worktime2 overexertion_lasty overexertion_lastw no_weekend ///
     hours_lastw1 hours_lastw2 log_wage_1 log_wage_2 log_bonuse_1 log_bonuse_2 log_income_i ///
	 log_fam_income log_pc_income occupation non_skilled service_worker non_skilled_service ///
	 position stable_worker temporary_worker temporary_contractor work_unit be_farmer health_insurance ///
	 year_education degree_education be_student living be_at_home HAI household_size household_group ///
	 h_group1-h_group5 num_child num_elderly pcgdp child_elder percent_farmer population service_sector

order newid homeid IDind hhid fa_id mo_id sp_id wave year1-year8 T1 prov1-prov12 T2 relation A5B A5D A8B ethnicity ///
     gender age agegroup ag1-ag11 hukou_interpo be_migrant m_group mg2 mg3 marital_status_interpo ///
	 employed employed_last1 employed_last2 workmonths1 workmonths2 workdays1 workdays2 workhours1 workhours2 ///
	 hours_lasty1 hours_lasty2 worktime1 worktime2 overexertion_lasty overexertion_lastw no_weekend ///
     hours_lastw1 hours_lastw2 log_wage_1 log_wage_2 log_bonuse_1 log_bonuse_2 log_income_i ///
	 log_fam_income log_pc_income occupation non_skilled service_worker non_skilled_service ///
	 position stable_worker temporary_worker temporary_contractor work_unit be_farmer health_insurance ///
	 year_education degree_education be_student living be_at_home HAI household_size household_group ///
	 h_group1-h_group5 num_child num_elderly pcgdp child_elder percent_farmer population service_sector
	 
renames hukou_interpo m_group employed prov1-prov12 mg2 mg3 ag1-ag11 T2 A5B A5D A8B year1-year8 marital_status_interpo ///
        overexertion_lasty overexertion_lastw\ ///
		hukou migrant_group be_employed Beijing Liaoning Heilongjiang Shanghai Jiangsu Shandong Henan /// 
		Hubei Hunan Guangxi Guizhou Chongqing be_rural_resident be_urban_resident age18_below age18_25 /// 
		age26_30 age31_35 age36_40 age41_45 age46_50 age51_55 age56_60 age61_65 age65_above location ///
		fat_no mot_no spo_no year1993 year1997 year2000 year2004 year2006 year2009 year2011 year2015 ///
		marital_status over_lasty over_lastw

sa CHNS_1993_2015.dta,replace
log close
